library(dplyr)
library(plotly)
library(ggplot2)
library(readxl)
library(tidyr)
library(knitr)
library(kableExtra)
library(TTR)
library(data.table)
library(caret)
set.seed(23)
Gold.prices <- read.csv("C:/Users/alili/Desktop/studia/9 semestr/ZED/projekt/Data pack/Gold prices.csv",
colClasses = c(rep("Date", 1),
rep("numeric", 2),
rep("NULL", 4)),
col.names = c('Date',
'Morning.Fix.USD',
'Afternoon.Fix.USD',
rep("NULL", 4)),
header = TRUE)
Gold.prices <- Gold.prices %>%
mutate(Morning.Fix.USD = coalesce(Morning.Fix.USD, Afternoon.Fix.USD),
Afternoon.Fix.USD = coalesce(Afternoon.Fix.USD, Morning.Fix.USD),
USD = (Morning.Fix.USD + Afternoon.Fix.USD) / 2)
Gold.prices.yearly <- Gold.prices %>%
mutate(Year = as.numeric(substr(Date, 1, 4))) %>%
group_by(Year) %>%
summarize( USD = (mean(Morning.Fix.USD) + mean(Afternoon.Fix.USD)) / 2)
Zbiór zawiera codzienne wyceny złota podczas sesji otwarcia i zamknięcia od dnia 1968-01-02 do 2021-09-29.
head(Gold.prices) %>%
kable() %>%
kable_styling("striped", full_width = F, position = 'left')
| Date | Morning.Fix.USD | Afternoon.Fix.USD | USD |
|---|---|---|---|
| 2021-09-29 | 1741.65 | 1737.15 | 1739.400 |
| 2021-09-28 | 1739.65 | 1733.75 | 1736.700 |
| 2021-09-27 | 1749.15 | 1755.30 | 1752.225 |
| 2021-09-24 | 1755.15 | 1746.80 | 1750.975 |
| 2021-09-23 | 1771.05 | 1750.00 | 1760.525 |
| 2021-09-22 | 1775.35 | 1773.40 | 1774.375 |
data.frame(nrow(Gold.prices)) %>%
rename("Liczba próbek" = 1) %>%
kable() %>%
kable_styling(full_width = FALSE, position = 'left')
| Liczba próbek |
|---|
| 13585 |
summary(Gold.prices) %>%
kable() %>%
kable_styling("striped", full_width = F, position = 'left')
| Date | Morning.Fix.USD | Afternoon.Fix.USD | USD | |
|---|---|---|---|---|
| Min. :1968-01-02 | Min. : 34.77 | Min. : 34.75 | Min. : 34.76 | |
| 1st Qu.:1981-06-10 | 1st Qu.: 280.50 | 1st Qu.: 280.30 | 1st Qu.: 280.27 | |
| Median :1994-11-14 | Median : 383.30 | Median : 383.45 | Median : 383.38 | |
| Mean :1994-11-16 | Mean : 575.17 | Mean : 574.98 | Mean : 575.07 | |
| 3rd Qu.:2008-04-23 | 3rd Qu.: 841.75 | 3rd Qu.: 838.25 | 3rd Qu.: 841.00 | |
| Max. :2021-09-29 | Max. :2061.50 | Max. :2067.15 | Max. :2058.15 |
Na interaktywnym wykresie widzimy zmianę cen otwarcia w czasie.
p <- ggplot(Gold.prices, aes(x = Date)) +
geom_point(aes(y = USD), color = "gold")
ggplotly(p)
World_Development_Indicators <- read_excel("C:/Users/alili/Desktop/studia/9 semestr/ZED/projekt/Data pack/World_Development_Indicators.xlsx",
na = '..',
range = "A1:BC44305")
Country Name, pozostawienie statystyk dla całego swiata w zbiorze krajów,World_Development_Indicators <- World_Development_Indicators %>%
filter(!`Country Name` %in% c("Low & middle income","Low income","Lower middle income","Middle income","Upper middle income","High income"))
World_Development_Indicators.Series_Codes <- select(World_Development_Indicators, `Series Name`, `Series Code`)
World_Development_Indicators <- select(World_Development_Indicators, -`Series Code`)
World_Development_Indicators <- World_Development_Indicators %>%
pivot_longer(cols = `1970 [YR1970]`:`2020 [YR2020]`, names_to = "Year") %>%
group_by(`Series Name`) %>%
mutate(row = row_number()) %>%
tidyr::pivot_wider(names_from = `Series Name`, values_from = value) %>%
select(-row)
World_Development_Indicators <- World_Development_Indicators %>%
mutate(Year = as.numeric(substr(Year, 1, 4)))
country.count <- length(unique(World_Development_Indicators$`Country Name`)) - 1
indicators.count <- World_Development_Indicators %>%
select(-`Country Name`, -`Country Code`, -Year) %>%
ncol
WDI_summary <- data.frame(country.count, indicators.count) %>%
rename("Liczba krajów" = country.count,
"Liczba wskaźników" = indicators.count)
kable(WDI_summary) %>%
kable_styling(full_width = FALSE, position = 'left')
| Liczba krajów | Liczba wskaźników |
|---|---|
| 201 | 213 |
| Lista krajów dostępnych w zbiorze |
|---|
| Afghanistan |
| Albania |
| Algeria |
| American Samoa |
| Andorra |
| Angola |
| Antigua and Barbuda |
| Argentina |
| Armenia |
| Aruba |
| Australia |
| Austria |
| Azerbaijan |
| Bahamas, The |
| Bahrain |
| Bangladesh |
| Barbados |
| Belarus |
| Belgium |
| Belize |
| Benin |
| Bermuda |
| Bhutan |
| Bolivia |
| Brazil |
| British Virgin Islands |
| Bulgaria |
| Burundi |
| Cambodia |
| Cameroon |
| Canada |
| Cayman Islands |
| Central African Republic |
| Chad |
| Channel Islands |
| Chile |
| China |
| Colombia |
| Comoros |
| Congo, Dem. Rep. |
| Congo, Rep. |
| Costa Rica |
| Croatia |
| Cuba |
| Curacao |
| Cyprus |
| Czech Republic |
| Denmark |
| Djibouti |
| Dominica |
| Dominican Republic |
| Ecuador |
| Egypt, Arab Rep. |
| El Salvador |
| Equatorial Guinea |
| Eritrea |
| Estonia |
| Eswatini |
| Ethiopia |
| Faroe Islands |
| Fiji |
| Finland |
| France |
| French Polynesia |
| Gabon |
| Gambia, The |
| Georgia |
| Germany |
| Ghana |
| Gibraltar |
| Greece |
| Greenland |
| Grenada |
| Guam |
| Guatemala |
| Guinea |
| Guinea-Bissau |
| Guyana |
| Haiti |
| Honduras |
| Hong Kong SAR, China |
| Hungary |
| Iceland |
| India |
| Indonesia |
| Iran, Islamic Rep. |
| Iraq |
| Ireland |
| Isle of Man |
| Israel |
| Italy |
| Jamaica |
| Japan |
| Jordan |
| Kazakhstan |
| Kenya |
| Kiribati |
| Korea, Dem. People’s Rep. |
| Korea, Rep. |
| Kosovo |
| Kuwait |
| Kyrgyz Republic |
| Lao PDR |
| Latvia |
| Lebanon |
| Lesotho |
| Liberia |
| Libya |
| Liechtenstein |
| Lithuania |
| Luxembourg |
| Macao SAR, China |
| Madagascar |
| Malawi |
| Malaysia |
| Maldives |
| Mali |
| Malta |
| Marshall Islands |
| Mauritania |
| Mauritius |
| Mexico |
| Micronesia, Fed. Sts. |
| Moldova |
| Monaco |
| Mongolia |
| Montenegro |
| Morocco |
| Mozambique |
| Myanmar |
| Namibia |
| Nepal |
| Netherlands |
| New Caledonia |
| New Zealand |
| Nicaragua |
| Niger |
| Nigeria |
| North Macedonia |
| Norway |
| Oman |
| Pakistan |
| Panama |
| Papua New Guinea |
| Paraguay |
| Peru |
| Philippines |
| Poland |
| Portugal |
| Puerto Rico |
| Qatar |
| Romania |
| Russian Federation |
| Rwanda |
| San Marino |
| Sao Tome and Principe |
| Saudi Arabia |
| Senegal |
| Serbia |
| Seychelles |
| Sierra Leone |
| Singapore |
| Sint Maarten (Dutch part) |
| Slovak Republic |
| Slovenia |
| Solomon Islands |
| South Africa |
| South Sudan |
| Spain |
| St. Vincent and the Grenadines |
| Sudan |
| Suriname |
| Sweden |
| Switzerland |
| Syrian Arab Republic |
| Tajikistan |
| Tanzania |
| Thailand |
| Togo |
| Tonga |
| Trinidad and Tobago |
| Tunisia |
| Turkey |
| Turks and Caicos Islands |
| Tuvalu |
| Uganda |
| Ukraine |
| United Arab Emirates |
| United Kingdom |
| United States |
| Uruguay |
| Uzbekistan |
| Vanuatu |
| Venezuela, RB |
| Vietnam |
| Virgin Islands (U.S.) |
| West Bank and Gaza |
| Yemen, Rep. |
| Zambia |
| Zimbabwe |
| Bosnia and Herzegovina |
| World |
| Lista dostępnych wskaźników |
|---|
| Country Name |
| Country Code |
| Year |
| Urban population growth (annual %) |
| Urban population (% of total population) |
| Value lost due to electrical outages (% of sales for affected firms) |
| Urban population |
| Urban land area (sq. km) |
| Unemployment, total (% of total labor force) (national estimate) |
| Unemployment with advanced education (% of total labor force with advanced education) |
| Transport services (% of commercial service exports) |
| Transport services (% of commercial service imports) |
| Trained teachers in upper secondary education (% of total teachers) |
| Trained teachers in secondary education (% of total teachers) |
| Trained teachers in primary education (% of total teachers) |
| Trademark applications, direct nonresident |
| Trade in services (% of GDP) |
| Trade (% of GDP) |
| Trademark applications, direct resident |
| Trademark applications, total |
| Total natural resources rents (% of GDP) |
| Total greenhouse gas emissions (kt of CO2 equivalent) |
| Total greenhouse gas emissions (% change from 1990) |
| Total alcohol consumption per capita (liters of pure alcohol, projected estimates, 15+ years of age) |
| Time required to build a warehouse (days) |
| Time required to enforce a contract (days) |
| Time required to get electricity (days) |
| Taxes on goods and services (current LCU) |
| Taxes on income, profits and capital gains (% of revenue) |
| Taxes on income, profits and capital gains (% of total taxes) |
| Taxes on income, profits and capital gains (current LCU) |
| Taxes on international trade (% of revenue) |
| Taxes on international trade (current LCU) |
| Taxes on goods and services (% value added of industry and services) |
| Taxes on goods and services (% of revenue) |
| Taxes on exports (current LCU) |
| Taxes on exports (% of tax revenue) |
| Taxes less subsidies on products (current US\() </td> </tr> <tr> <td style="text-align:left;"> Taxes less subsidies on products (current LCU) </td> </tr> <tr> <td style="text-align:left;"> Taxes less subsidies on products (constant LCU) </td> </tr> <tr> <td style="text-align:left;"> Tax revenue (current LCU) </td> </tr> <tr> <td style="text-align:left;"> Tax revenue (% of GDP) </td> </tr> <tr> <td style="text-align:left;"> Tax payments (number) </td> </tr> <tr> <td style="text-align:left;"> Survival to age 65, female (% of cohort) </td> </tr> <tr> <td style="text-align:left;"> Survival to age 65, male (% of cohort) </td> </tr> <tr> <td style="text-align:left;"> Suicide mortality rate (per 100,000 population) </td> </tr> <tr> <td style="text-align:left;"> Suicide mortality rate, female (per 100,000 female population) </td> </tr> <tr> <td style="text-align:left;"> Suicide mortality rate, male (per 100,000 male population) </td> </tr> <tr> <td style="text-align:left;"> Stocks traded, turnover ratio of domestic shares (%) </td> </tr> <tr> <td style="text-align:left;"> Stocks traded, total value (current US\)) |
| Stocks traded, total value (% of GDP) |
| Strength of legal rights index (0=weak to 12=strong) |
| Short-term debt (% of total reserves) |
| Short-term debt (% of total external debt) |
| Short-term debt (% of exports of goods, services and primary income) |
| Share of youth not in education, employment or training, female (% of female youth population) |
| Share of youth not in education, employment or training, male (% of male youth population) |
| Share of youth not in education, employment or training, total (% of youth population) |
| Services, value added (% of GDP) |
| Service imports (BoP, current US\() </td> </tr> <tr> <td style="text-align:left;"> Service exports (BoP, current US\)) |
| Self-employed, male (% of male employment) (modeled ILO estimate) |
| Self-employed, total (% of total employment) (modeled ILO estimate) |
| Self-employed, female (% of female employment) (modeled ILO estimate) |
| Secure Internet servers |
| Secure Internet servers (per 1 million people) |
| Secondary education, teachers |
| Secondary education, pupils |
| Scientific and technical journal articles |
| School enrollment, tertiary (gross), gender parity index (GPI) |
| S&P Global Equity Indices (annual % change) |
| Rural population growth (annual %) |
| Rural population (% of total population) |
| Rural population |
| Researchers in R&D (per million people) |
| Research and development expenditure (% of GDP) |
| Renewable energy consumption (% of total final energy consumption) |
| Renewable internal freshwater resources per capita (cubic meters) |
| Renewable internal freshwater resources, total (billion cubic meters) |
| Renewable electricity output (% of total electricity output) |
| Real interest rate (%) |
| Pupil-teacher ratio, upper secondary |
| Pupil-teacher ratio, tertiary |
| Pupil-teacher ratio, secondary |
| Pupil-teacher ratio, primary |
| Pupil-teacher ratio, preprimary |
| Rail lines (total route-km) |
| Railways, goods transported (million ton-km) |
| Railways, passengers carried (million passenger-km) |
| Proportion of seats held by women in national parliaments (%) |
| Primary income payments (BoP, current US\() </td> </tr> <tr> <td style="text-align:left;"> Primary income receipts (BoP, current US\)) |
| Primary school starting age (years) |
| Prevalence of undernourishment (% of population) |
| Portfolio investment, net (BoP, current US\() </td> </tr> <tr> <td style="text-align:left;"> Portfolio investment, bonds (PPG + PNG) (NFL, current US\)) |
| Portfolio equity, net inflows (BoP, current US\() </td> </tr> <tr> <td style="text-align:left;"> Population, total </td> </tr> <tr> <td style="text-align:left;"> Population, male </td> </tr> <tr> <td style="text-align:left;"> Population, male (% of total population) </td> </tr> <tr> <td style="text-align:left;"> Population, female (% of total population) </td> </tr> <tr> <td style="text-align:left;"> Population, female </td> </tr> <tr> <td style="text-align:left;"> Population living in slums (% of urban population) </td> </tr> <tr> <td style="text-align:left;"> Population in urban agglomerations of more than 1 million </td> </tr> <tr> <td style="text-align:left;"> Population in the largest city (% of urban population) </td> </tr> <tr> <td style="text-align:left;"> Population in largest city </td> </tr> <tr> <td style="text-align:left;"> Population growth (annual %) </td> </tr> <tr> <td style="text-align:left;"> Population density (people per sq. km of land area) </td> </tr> <tr> <td style="text-align:left;"> Population ages 65 and above (% of total population) </td> </tr> <tr> <td style="text-align:left;"> Population ages 15-64 (% of total population) </td> </tr> <tr> <td style="text-align:left;"> Population ages 0-14 (% of total population) </td> </tr> <tr> <td style="text-align:left;"> PM2.5 air pollution, mean annual exposure (micrograms per cubic meter) </td> </tr> <tr> <td style="text-align:left;"> PM2.5 air pollution, population exposed to levels exceeding WHO guideline value (% of total) </td> </tr> <tr> <td style="text-align:left;"> PM2.5 pollution, population exposed to levels exceeding WHO Interim Target-1 value (% of total) </td> </tr> <tr> <td style="text-align:left;"> PM2.5 pollution, population exposed to levels exceeding WHO Interim Target-2 value (% of total) </td> </tr> <tr> <td style="text-align:left;"> PM2.5 pollution, population exposed to levels exceeding WHO Interim Target-3 value (% of total) </td> </tr> <tr> <td style="text-align:left;"> Part time employment, total (% of total employment) </td> </tr> <tr> <td style="text-align:left;"> Patent applications, nonresidents </td> </tr> <tr> <td style="text-align:left;"> Patent applications, residents </td> </tr> <tr> <td style="text-align:left;"> Number of under-five deaths </td> </tr> <tr> <td style="text-align:left;"> Nitrous oxide emissions (% change from 1990) </td> </tr> <tr> <td style="text-align:left;"> Nitrous oxide emissions (thousand metric tons of CO2 equivalent) </td> </tr> <tr> <td style="text-align:left;"> Nitrous oxide emissions in energy sector (% of total) </td> </tr> <tr> <td style="text-align:left;"> Net primary income (Net income from abroad) (current US\)) |
| Net primary income (Net income from abroad) (current LCU) |
| Net primary income (Net income from abroad) (constant LCU) |
| Net primary income (BoP, current US\() </td> </tr> <tr> <td style="text-align:left;"> Net official development assistance received (current US\)) |
| Net official aid received (current US\() </td> </tr> <tr> <td style="text-align:left;"> Net domestic credit (current LCU) </td> </tr> <tr> <td style="text-align:left;"> Net acquisition of financial assets (% of GDP) </td> </tr> <tr> <td style="text-align:left;"> Natural gas rents (% of GDP) </td> </tr> <tr> <td style="text-align:left;"> Mortality rate, infant (per 1,000 live births) </td> </tr> <tr> <td style="text-align:left;"> Mortality caused by road traffic injury (per 100,000 population) </td> </tr> <tr> <td style="text-align:left;"> Methane emissions (% change from 1990) </td> </tr> <tr> <td style="text-align:left;"> Methane emissions (kt of CO2 equivalent) </td> </tr> <tr> <td style="text-align:left;"> Methane emissions in energy sector (thousand metric tons of CO2 equivalent) </td> </tr> <tr> <td style="text-align:left;"> Merchandise exports to high-income economies (% of total merchandise exports) </td> </tr> <tr> <td style="text-align:left;"> Manufacturing, value added (% of GDP) </td> </tr> <tr> <td style="text-align:left;"> Literacy rate, adult total (% of people ages 15 and above) </td> </tr> <tr> <td style="text-align:left;"> Life expectancy at birth, total (years) </td> </tr> <tr> <td style="text-align:left;"> Lending interest rate (%) </td> </tr> <tr> <td style="text-align:left;"> Land area (sq. km) </td> </tr> <tr> <td style="text-align:left;"> Labor force, total </td> </tr> <tr> <td style="text-align:left;"> International tourism, expenditures (current US\)) |
| International migrant stock (% of population) |
| Interest payments (% of expense) |
| Inflation, consumer prices (annual %) |
| Individuals using the Internet (% of population) |
| Income share held by highest 10% |
| Imports of goods and services (current US\() </td> </tr> <tr> <td style="text-align:left;"> Imports of goods and services (% of GDP) </td> </tr> <tr> <td style="text-align:left;"> ICT goods exports (% of total goods exports) </td> </tr> <tr> <td style="text-align:left;"> Gross savings (% of GDP) </td> </tr> <tr> <td style="text-align:left;"> Gross national expenditure (% of GDP) </td> </tr> <tr> <td style="text-align:left;"> Gross national expenditure (current US\)) |
| Gross savings (current US\() </td> </tr> <tr> <td style="text-align:left;"> Gross domestic savings (% of GDP) </td> </tr> <tr> <td style="text-align:left;"> Gross domestic savings (current US\)) |
| Government expenditure on education, total (% of GDP) |
| Goods exports (BoP, current US\() </td> </tr> <tr> <td style="text-align:left;"> Goods imports (BoP, current US\)) |
| GNI growth (annual %) |
| GDP per capita (current US\() </td> </tr> <tr> <td style="text-align:left;"> GDP per capita growth (annual %) </td> </tr> <tr> <td style="text-align:left;"> GDP growth (annual %) </td> </tr> <tr> <td style="text-align:left;"> GDP (current US\)) |
| Fuel exports (% of merchandise exports) |
| Fuel imports (% of merchandise imports) |
| Food exports (% of merchandise exports) |
| Food imports (% of merchandise imports) |
| External debt stocks (% of GNI) |
| Exports of goods and services (current US\() </td> </tr> <tr> <td style="text-align:left;"> Exports of goods and services (annual % growth) </td> </tr> <tr> <td style="text-align:left;"> Expense (% of GDP) </td> </tr> <tr> <td style="text-align:left;"> Employment in industry (% of total employment) (modeled ILO estimate) </td> </tr> <tr> <td style="text-align:left;"> Employment in services (% of total employment) (modeled ILO estimate) </td> </tr> <tr> <td style="text-align:left;"> Employment in agriculture (% of total employment) (modeled ILO estimate) </td> </tr> <tr> <td style="text-align:left;"> Employers, total (% of total employment) (modeled ILO estimate) </td> </tr> <tr> <td style="text-align:left;"> Electricity production from renewable sources, excluding hydroelectric (kWh) </td> </tr> <tr> <td style="text-align:left;"> Electricity production from renewable sources, excluding hydroelectric (% of total) </td> </tr> <tr> <td style="text-align:left;"> Electricity production from oil, gas and coal sources (% of total) </td> </tr> <tr> <td style="text-align:left;"> Electricity production from coal sources (% of total) </td> </tr> <tr> <td style="text-align:left;"> Electricity production from hydroelectric sources (% of total) </td> </tr> <tr> <td style="text-align:left;"> Electricity production from natural gas sources (% of total) </td> </tr> <tr> <td style="text-align:left;"> Electricity production from nuclear sources (% of total) </td> </tr> <tr> <td style="text-align:left;"> Ease of doing business score (0 = lowest performance to 100 = best performance) </td> </tr> <tr> <td style="text-align:left;"> Diabetes prevalence (% of population ages 20 to 79) </td> </tr> <tr> <td style="text-align:left;"> Deposit interest rate (%) </td> </tr> <tr> <td style="text-align:left;"> Current health expenditure per capita (current US\)) |
| Current health expenditure (% of GDP) |
| Consumer price index (2010 = 100) |
| CO2 emissions from solid fuel consumption (% of total) |
| CO2 emissions from solid fuel consumption (kt) |
| CO2 emissions from transport (% of total fuel combustion) |
| CO2 intensity (kg per kg of oil equivalent energy use) |
| CO2 emissions from residential buildings and commercial and public services (% of total fuel combustion) |
| CO2 emissions from other sectors, excluding residential buildings and commercial and public services (% of total fuel combustion) |
| CO2 emissions from manufacturing industries and construction (% of total fuel combustion) |
| CO2 emissions from liquid fuel consumption (kt) |
| CO2 emissions from liquid fuel consumption (% of total) |
| CO2 emissions from gaseous fuel consumption (kt) |
| CO2 emissions from gaseous fuel consumption (% of total) |
| CO2 emissions from electricity and heat production, total (% of total fuel combustion) |
| CO2 emissions (metric tons per capita) |
| CO2 emissions (kt) |
| CO2 emissions (kg per PPP $ of GDP) |
| CO2 emissions (kg per 2017 PPP $ of GDP) |
| CO2 emissions (kg per 2010 US$ of GDP) |
| Birth rate, crude (per 1,000 people) |
| Bank capital to assets ratio (%) |
| Average number of visits or required meetings with tax officials (for affected firms) |
| Average precipitation in depth (mm per year) |
| Automated teller machines (ATMs) (per 100,000 adults) |
| Account ownership at a financial institution or with a mobile-money-service provider (% of population ages 15+) |
| Access to electricity (% of population) |
SP.Composite <- read.csv("C:/Users/alili/Desktop/studia/9 semestr/ZED/projekt/Data pack/S&P Composite.csv")
SP.Composite <- SP.Composite %>%
rename(Date = Year,
CAPE = Cyclically.Adjusted.PE.Ratio) %>%
mutate(Date = as.Date(Date))
head(SP.Composite) %>%
kable() %>%
kable_styling("striped", full_width = F, position = 'left') %>%
scroll_box( width = '100%')
| Date | S.P.Composite | Dividend | Earnings | CPI | Long.Interest.Rate | Real.Price | Real.Dividend | Real.Earnings | CAPE |
|---|---|---|---|---|---|---|---|---|---|
| 2021-10-31 | 3700.650 | NA | NA | 260.1098 | 0.93 | 3700.650 | NA | NA | 33.73946 |
| 2021-09-30 | 4493.280 | NA | NA | 273.9832 | 1.29 | 4477.204 | NA | NA | 38.34228 |
| 2021-08-31 | 4454.206 | NA | NA | 273.6565 | 1.28 | 4443.570 | NA | NA | 38.09043 |
| 2021-07-31 | 4363.713 | NA | NA | 273.0030 | 1.32 | 4363.713 | NA | NA | 37.44349 |
| 2021-06-30 | 4238.490 | 57.86504 | 158.74 | 271.6960 | 1.52 | 4258.879 | 58.14340 | 159.5036 | 36.69631 |
| 2021-05-31 | 4167.850 | 57.78782 | 148.56 | 269.1950 | 1.62 | 4226.807 | 58.60528 | 150.6615 | 36.55215 |
data.frame(nrow(Gold.prices)) %>%
rename("Liczba próbek" = 1) %>%
kable() %>%
kable_styling(full_width = FALSE, position = 'left')
| Liczba próbek |
|---|
| 13585 |
summary(SP.Composite) %>%
kable() %>%
kable_styling(bootstrap_options = c("striped", "hover"), position = 'left') %>%
scroll_box( width = '100%')
| Date | S.P.Composite | Dividend | Earnings | CPI | Long.Interest.Rate | Real.Price | Real.Dividend | Real.Earnings | CAPE | |
|---|---|---|---|---|---|---|---|---|---|---|
| Min. :1871-01-31 | Min. : 2.730 | Min. : 0.1800 | Min. : 0.1600 | Min. : 6.28 | Min. : 0.620 | Min. : 73.9 | Min. : 5.445 | Min. : 4.576 | Min. : 4.784 | |
| 1st Qu.:1908-10-07 | 1st Qu.: 7.902 | 1st Qu.: 0.4202 | 1st Qu.: 0.5608 | 1st Qu.: 10.20 | 1st Qu.: 3.171 | 1st Qu.: 186.6 | 1st Qu.: 9.417 | 1st Qu.: 14.063 | 1st Qu.:11.898 | |
| Median :1946-06-15 | Median : 17.370 | Median : 0.8717 | Median : 1.4625 | Median : 20.35 | Median : 3.815 | Median : 283.3 | Median :14.411 | Median : 23.524 | Median :16.381 | |
| Mean :1946-06-15 | Mean : 327.968 | Mean : 6.7321 | Mean : 15.3714 | Mean : 62.39 | Mean : 4.504 | Mean : 622.0 | Mean :17.498 | Mean : 34.907 | Mean :17.215 | |
| 3rd Qu.:1984-02-21 | 3rd Qu.: 164.400 | 3rd Qu.: 7.0525 | 3rd Qu.: 14.7258 | 3rd Qu.:102.28 | 3rd Qu.: 5.139 | 3rd Qu.: 707.0 | 3rd Qu.:22.301 | 3rd Qu.: 43.768 | 3rd Qu.:20.913 | |
| Max. :2021-10-31 | Max. :4493.280 | Max. :59.6800 | Max. :158.7400 | Max. :273.98 | Max. :15.320 | Max. :4477.2 | Max. :63.511 | Max. :159.504 | Max. :44.198 | |
| NA | NA | NA’s :4 | NA’s :4 | NA | NA | NA | NA’s :4 | NA’s :4 | NA’s :120 |
Currency.Exchange.Rates <- read.csv("C:/Users/alili/Desktop/studia/9 semestr/ZED/projekt/Data pack/CurrencyExchangeRates.csv")
Currency.Exchange.Rates <- Currency.Exchange.Rates %>%
mutate(Date = as.Date(Date))
Zbiór zawiera codzienny kurs wymiany walut od dnia 1995-01-02 do 2018-05-02.
Currency.Exchange.Rates %>%
names %>%
data.frame() %>%
rename('Dostępne waluty' = '.') %>%
kable %>%
kable_styling("striped", full_width = F, position = 'left') %>%
scroll_box(width = '400px', height = '400px')
| Dostępne waluty |
|---|
| Date |
| Algerian.Dinar |
| Australian.Dollar |
| Bahrain.Dinar |
| Bolivar.Fuerte |
| Botswana.Pula |
| Brazilian.Real |
| Brunei.Dollar |
| Canadian.Dollar |
| Chilean.Peso |
| Chinese.Yuan |
| Colombian.Peso |
| Czech.Koruna |
| Danish.Krone |
| Euro |
| Hungarian.Forint |
| Icelandic.Krona |
| Indian.Rupee |
| Indonesian.Rupiah |
| Iranian.Rial |
| Israeli.New.Sheqel |
| Japanese.Yen |
| Kazakhstani.Tenge |
| Korean.Won |
| Kuwaiti.Dinar |
| Libyan.Dinar |
| Malaysian.Ringgit |
| Mauritian.Rupee |
| Mexican.Peso |
| Nepalese.Rupee |
| New.Zealand.Dollar |
| Norwegian.Krone |
| Nuevo.Sol |
| Pakistani.Rupee |
| Peso.Uruguayo |
| Philippine.Peso |
| Polish.Zloty |
| Qatar.Riyal |
| Rial.Omani |
| Russian.Ruble |
| Saudi.Arabian.Riyal |
| Singapore.Dollar |
| South.African.Rand |
| Sri.Lanka.Rupee |
| Swedish.Krona |
| Swiss.Franc |
| Thai.Baht |
| Trinidad.And.Tobago.Dollar |
| Tunisian.Dinar |
| U.A.E..Dirham |
| U.K..Pound.Sterling |
| U.S..Dollar |
Bitcoin.prices <- read.csv("C:/Users/alili/Desktop/studia/9 semestr/ZED/projekt/Data pack/Bitcoin/BCHAIN-MKPRU.csv",
colClasses = c(rep("Date", 1),
rep("numeric", 1)),
col.names = c('Date',
'USD'))
Bitcoin.trade.volume <- read.csv("C:/Users/alili/Desktop/studia/9 semestr/ZED/projekt/Data pack/Bitcoin/BCHAIN-TRVOU.csv",
colClasses = c(rep("Date", 1),
rep("numeric", 1)),
col.names = c('Date',
'Trade Volume'))
Bitcoin.mine.difficulty <- read.csv("C:/Users/alili/Desktop/studia/9 semestr/ZED/projekt/Data pack/Bitcoin/BCHAIN-DIFF.csv",
colClasses = c(rep("Date", 1),
rep("numeric", 1)),
col.names = c('Date',
'Mine difficulty'))
Bitcoin.hash.rate <- read.csv("C:/Users/alili/Desktop/studia/9 semestr/ZED/projekt/Data pack/Bitcoin/BCHAIN-HRATE.csv",
colClasses = c(rep("Date", 1),
rep("numeric", 1)),
col.names = c('Date',
'Hash rate'))
Bitcoin <- Bitcoin.prices %>%
merge(Bitcoin.trade.volume, by = "Date") %>%
merge(Bitcoin.mine.difficulty, by = "Date") %>%
merge(Bitcoin.hash.rate, by = "Date")
Zestaw danych zawiera codzienne informacje od początku istnienia Bitcoina.
Znaczenie atrybutów: * USD - Bitcoin Market Price USD,Average USD market price across major bitcoin exchanges. * Trade volume - Bitcoin USD Exchange Trade Volume,The total USD value of trading volume on major bitcoin exchanges. * Mine difficulty - Bitcoin Difficulty,A relative measure of how difficult it is to find a new block. The difficulty is adjusted periodically as a function of how much hashing power has been deployed by the network of miners. * Hash rate - Bitcoin Hash Rate,The estimated number of tera hashes per second (trillions of hashes per second) the Bitcoin network is performing.
tail(Bitcoin)
## Date USD Trade.Volume Mine.difficulty Hash.rate
## 4654 2021-09-30 41522.38 221224597 1.899764e+13 161488615
## 4655 2021-10-01 43757.81 360342502 1.899764e+13 132212901
## 4656 2021-10-02 48140.11 688291407 1.899764e+13 177543039
## 4657 2021-10-03 47727.10 184243788 1.899764e+13 141656680
## 4658 2021-10-04 48205.72 183312374 1.900912e+13 147411968
## 4659 2021-10-05 49143.95 370887916 1.989305e+13 162177736
summary(Bitcoin)
## Date USD Trade.Volume
## Min. :2009-01-03 Min. : 0.00 Min. :0.000e+00
## 1st Qu.:2012-03-12 1st Qu.: 7.21 1st Qu.:1.948e+05
## Median :2015-05-21 Median : 431.89 Median :6.824e+06
## Mean :2015-05-21 Mean : 5132.38 Mean :1.467e+08
## 3rd Qu.:2018-07-28 3rd Qu.: 6496.35 3rd Qu.:1.484e+08
## Max. :2021-10-05 Max. :63554.44 Max. :5.352e+09
## Mine.difficulty Hash.rate
## Min. :0.000e+00 Min. : 0
## 1st Qu.:1.689e+06 1st Qu.: 12
## Median :4.881e+10 Median : 356089
## Mean :3.665e+12 Mean : 26458258
## 3rd Qu.:5.364e+12 3rd Qu.: 38265984
## Max. :2.505e+13 Max. :198514006
World.Population.Top10
## # A tibble: 561 x 5
## # Groups: Year [51]
## `Country Name` Year Population `Population in mln` Rank
## <chr> <dbl> <dbl> <dbl> <dbl>
## 1 China 1970 818315000 818. 11
## 2 India 1970 555189797 555. 10
## 3 United States 1970 205052000 205. 9
## 4 Russian Federation 1970 130404000 130. 8
## 5 Indonesia 1970 114793179 115. 7
## 6 Japan 1970 103403000 103. 6
## 7 Brazil 1970 95113265 95.1 5
## 8 Germany 1970 78169289 78.2 4
## 9 Bangladesh 1970 64232486 64.2 3
## 10 Pakistan 1970 58142062 58.1 2
## # ... with 551 more rows
Wniosek: Liczba samobójstw jest w trendzie spadkowym, zdrowie psychiczne ludzkości się polepsza.
gold <- Gold.prices %>%
select(Date, USD) %>%
arrange(desc(row_number())) %>%
filter( Date < '2021-09-29') %>%
filter( Date > '2016-01-01') %>%
rename( USD.gold = USD )
BTC_price <- Bitcoin.prices %>%
arrange(desc(row_number())) %>%
filter( Date < '2021-09-29') %>%
filter( Date > '2016-01-01') %>%
rename( USD.BTC = USD )
gold.btc <- gold %>%
inner_join(BTC_price, by = 'Date')
cor.gold.btc <- cor(gold.btc$USD.gold, gold.btc$USD.BTC, use = "complete.obs")
coeff.gold.btc <- max(gold.btc$USD.BTC)/max(gold.btc$USD.gold)
gold.btc %>%
ggplot( aes(x = Date) ) +
geom_line( aes(y = USD.gold), color = 'gold' ) +
geom_line( aes(y = USD.BTC/coeff.gold.btc), color = 'orange' ) +
scale_y_continuous(
name = "Cena złota",
sec.axis = sec_axis(~.*coeff.gold.btc, name="Cena BTC")
) +
ggtitle("Cena złota oraz BTC [USD]") +
theme_bw() +
theme(
axis.title.y = element_text(color = 'gold', size=13),
axis.title.y.right = element_text(color = 'orange', size=13),
axis.text.y = element_text(color = 'gold', size=13),
axis.text.y.right = element_text(color = 'orange', size=13)
)
Wniosek: Pomimo korelacji na poziomie 0.68 wizualna inspekcja nie pokazuje jasnej zależności pomiędzy cenami porównywanych aktywów.
Gold.prices.monthly <- Gold.prices %>%
mutate(Date = substr(Date, 1, 7)) %>%
group_by(Date) %>%
summarize( USD = (mean(Morning.Fix.USD) + mean(Afternoon.Fix.USD)) / 2)
SP.df <- SP.Composite %>%
mutate(Date = substr(Date, 1, 7)) %>%
select(Date, S.P.Composite)
gold.SP <- Gold.prices.monthly %>%
inner_join(SP.df, by = 'Date') %>%
mutate(Date = as.Date(paste0(Date,'-01')))
cor(gold.SP$USD, gold.SP$S.P.Composite, use = "complete.obs")
## [1] 0.8181548
coeff.gold.SP <- max(gold.SP$S.P.Composite)/max(gold.SP$USD)
gold.SP %>%
ggplot( aes(x = Date) ) +
geom_line( aes(y = USD), color = 'gold' ) +
geom_line( aes(y = S.P.Composite/coeff.gold.SP), color = 'royalblue' ) +
scale_y_continuous(
name = "Cena złota",
sec.axis = sec_axis(~.*coeff.gold.SP, name="Cena S&P Composite")
) +
ggtitle("Cena złota oraz indeksu S&P Composite [USD]") +
theme_bw() +
theme(
axis.title.y = element_text(color = 'gold', size=13),
axis.title.y.right = element_text(color = 'royalblue', size=13),
axis.text.y = element_text(color = 'gold', size=13),
axis.text.y.right = element_text(color = 'royalblue', size=13)
)
Wniosek: Pomimo korelacji na poziomie 0.82 wizualna inspekcja nie pokazuje jasnej zależności pomiędzy cenami porównywanych aktywów.
USA.WDI <- World_Development_Indicators %>%
filter(`Country Name` == 'United States') %>%
merge(Gold.prices.yearly, by = 'Year') %>%
select_if(~ !any(is.na(.)))
USA.WDI.to_cor <- USA.WDI %>%
select(-(1:3))
num_col=ncol(USA.WDI.to_cor[,-1])
out_indx <- which(upper.tri(diag(num_col)))
cor_cols <- USA.WDI.to_cor %>%
do(melt(cor(.[,-1], use="pairwise.complete.obs"), value.name="cor")[out_indx,])
cor_cols <- cor_cols %>%
filter(Var2 == 'USD') %>%
top_n(10) %>%
arrange(desc(cor)) %>%
rename(top_cor = cor)
cor_cols
## Var1 Var2 top_cor
## 1 Net primary income (BoP, current US$) USD 0.9375048
## 2 Service exports (BoP, current US$) USD 0.9017878
## 3 Goods exports (BoP, current US$) USD 0.8963826
## 4 Net domestic credit (current LCU) USD 0.8947741
## 5 Primary income receipts (BoP, current US$) USD 0.8925628
## 6 Service imports (BoP, current US$) USD 0.8868864
## 7 Goods imports (BoP, current US$) USD 0.8723380
## 8 GDP (current US$) USD 0.8627224
## 9 Primary income payments (BoP, current US$) USD 0.8473236
## 10 Population ages 65 and above (% of total population) USD 0.8421186
coeff.income_gold.price <- max(USA.WDI$`Net primary income (BoP, current US$)`)/max(USA.WDI$USD)
USA.WDI %>%
ggplot( aes(x = Year) ) +
geom_line( aes(y = USD), color = 'gold' ) +
geom_line( aes(y = `Net primary income (BoP, current US$)`/coeff.income_gold.price), color = 'royalblue' ) +
scale_y_continuous(
name = "Cena złota",
sec.axis = sec_axis(~.*coeff.gold.SP, name="Net primary income")
) +
ggtitle("Cena złota w porównaniu ze wskaźnikiem przychodu netto w USA") +
theme_bw() +
theme(
axis.title.y = element_text(color = 'gold', size=13),
axis.title.y.right = element_text(color = 'royalblue', size=13),
axis.text.y = element_text(color = 'gold', size=13),
axis.text.y.right = element_text(color = 'royalblue', size=13)
)
coeff.service_export.price <- max(USA.WDI$`Service exports (BoP, current US$)`)/max(USA.WDI$USD)
USA.WDI %>%
ggplot( aes(x = Year) ) +
geom_line( aes(y = USD), color = 'gold' ) +
geom_line( aes(y = `Service exports (BoP, current US$)` / coeff.service_export.price), color = 'royalblue' ) +
scale_y_continuous(
name = "Cena złota",
sec.axis = sec_axis(~.*coeff.gold.SP, name="Service exports")
) +
ggtitle("Cena złota w porównaniu ze wskaźnikiem eksportu usług w USA") +
theme_bw() +
theme(
axis.title.y = element_text(color = 'gold', size=13),
axis.title.y.right = element_text(color = 'royalblue', size=13),
axis.text.y = element_text(color = 'gold', size=13),
axis.text.y.right = element_text(color = 'royalblue', size=13)
)
Wnioski: Wskaźniki WDI są podawane rok rocznie. Próba korelacji różnych wskaźników WDI z ceną złota wskazuje głównie na wzrost gospodarczy. W próbie przewidywania ceny złota w ujęciu rocznym nie widzę sensu ze względu na zbyt małą ilość danych.
Zamiast przewidywać cenę złota, co w oparciu o posiadane dane będzie skazane na mizerny rezultat, stworzę model próbujący znaleźć w prostych wskaźnikach analizy technicznej dobre momenty do kupna lub sprzedaży złota.
Moja próba oparta bedzie o strategię średnich kroczących (ang. Moving averages). Metoda ta polega na obliczeniu średniej ceny złota z okresu czasu o długości n wstecz.
Na podstawie wielu średnich kroczących stworzony zostanie klasyfikator próbujący przewidzieć dobry moment do kupna lub sprzedaży kruszca.
Pierwszym krokiem jest subiektywne zaznaczenie okresów w których warto było poszerzać oraz zawężać ekspozycję swojego portfela inwestycyjnego na złoto.
Gold.prices %>%
mutate(exposition = 0) %>%
write_xlsx("C:/Users/alili/Desktop/studia/9 semestr/ZED/projekt/Data pack/Gold_prices_to_mark_exposition.xlsx")
Odczytanie nowego zbioru:
Gold.prices <- read_excel("C:/Users/alili/Desktop/studia/9 semestr/ZED/projekt/Data pack/Gold_prices_with_marked_exposition.xlsx")
Czyszczenie zbioru:
positive.exposition <- Gold.prices$exposition
positive.exposition[Gold.prices$exposition == -1] <- 0
negative.exposition <- Gold.prices$exposition
negative.exposition[Gold.prices$exposition == 1] <- 0
negative.exposition[Gold.prices$exposition == -1] <- 1
Gold.prices <- Gold.prices %>%
mutate('positive.exposition' = positive.exposition,
'negative.exposition' = negative.exposition,)
Wykres przedstawiający miejsca kupna oraz sprzedaży:
positive.diff <- diff(c(0, positive.exposition))
positive.starts <- Gold.prices$Date[positive.diff == 1]
positive.ends <- Gold.prices$Date[positive.diff == -1]
if (length(positive.starts) > length(positive.ends)) positive.ends <- c(positive.ends, tail(Gold.prices$Date, 1))
positive.sections <- data.frame(start=positive.starts, end=positive.ends, group=seq_along(positive.starts))
negative.diff <- diff(c(0, negative.exposition))
negative.starts <- Gold.prices$Date[negative.diff == 1]
negative.ends <- Gold.prices$Date[negative.diff == -1]
if (length(negative.starts) > length(negative.ends)) negative.ends <- c(negative.ends, tail(Gold.prices$Date, 1))
negative.sections <- data.frame(start=negative.starts, end=negative.ends, group=seq_along(negative.starts))
ggplot(data=Gold.prices, aes(Date, USD)) +
theme_minimal() +
geom_line(color = "gold") +
geom_rect(data=positive.sections, inherit.aes=FALSE, aes(xmin=start, xmax=end, ymin=min(Gold.prices$value),
ymax=max(Gold.prices$value), group=group), color="transparent", fill="green", alpha=0.4)+
geom_rect(data=negative.sections, inherit.aes=FALSE, aes(xmin=start, xmax=end, ymin=min(Gold.prices$value),
ymax=max(Gold.prices$value), group=group), color="transparent", fill="red", alpha=0.4)
Podsumowanie ilości dni dobrych do kupna oraz sprzedaży złota.
positive.exposition.count <- data.frame(positive.exposition) %>%
filter(positive.exposition > 0) %>%
count() %>%
rename("positive exposition" = n)
negative.exposition.count <- data.frame(negative.exposition) %>%
filter(negative.exposition > 0) %>%
count() %>%
rename("negative exposition" = n)
data.frame(positive.exposition.count, negative.exposition.count) %>%
print
## positive.exposition negative.exposition
## 1 2646 1498
Liczba miejsc do kupna jest znacznie wieksza niż miejsc do sprzedaży. Jest to spowodowane tym, że wzrost wartości złota jest powolny, a spadki szybkie.
Obliczanie średnich kroczących:
df.to.model <- Gold.prices %>%
arrange(desc(row_number())) %>%
mutate(MA1 = SMA(USD, 1),
MA3 = SMA(USD, 3),
MA7 = SMA(USD, 7),
MA11 = SMA(USD, 11),
MA19 = SMA(USD, 19),
MA27 = SMA(USD, 27),
MA50 = SMA(USD, 50),
MA100 = SMA(USD, 100),
MA200 = SMA(USD, 200),
MA365 = SMA(USD, 365),
MA730 = SMA(USD, 730),
MA1095 = SMA(USD, 1095)
) %>%
filter_at(vars(-Date), all_vars(!is.na(.))) %>%
select(-(Date:USD), -positive.exposition, -negative.exposition) %>%
mutate(exposition = as.factor(exposition))
df.to.model <- Gold.prices %>%
arrange(desc(row_number())) %>%
mutate(MA1 = SMA(USD, 1),
MA3 = SMA(USD, 3),
MA7 = SMA(USD, 7),
MA19 = SMA(USD, 19),
MA50 = SMA(USD, 50),
MA200 = SMA(USD, 200),
MA1095 = SMA(USD, 1095)
) %>%
filter_at(vars(-Date), all_vars(!is.na(.))) %>%
select(-(Date:USD), -positive.exposition, -negative.exposition) %>%
mutate(exposition = as.factor(exposition))
head(df.to.model)
## # A tibble: 6 x 8
## exposition MA1 MA3 MA7 MA19 MA50 MA200 MA1095
## <fct> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 1 50.8 50.5 50.2 49.6 48.8 45.0 39.9
## 2 1 51.8 51.0 50.5 49.8 48.9 45.0 39.9
## 3 1 53.9 52.2 51.1 50.0 49.0 45.1 39.9
## 4 1 53.2 53.0 51.6 50.2 49.1 45.2 39.9
## 5 1 53 53.3 51.9 50.4 49.2 45.2 39.9
## 6 1 52.8 53.0 52.3 50.6 49.3 45.3 40.0
Mmmm, jaki piękny zbiór na wykonanie modelu… już nie mogę się doczekać :3
ggplot(data=df.to.model, aes(Date)) +
theme_minimal() +
geom_line(aes(y=USD), color = "gold") +
geom_line(aes(y=MA19), color = "black") +
geom_line(aes(y=MA100), color = "black") +
geom_line(aes(y=MA365), color = "black")
Podzielenie zbioru na zbiór treningowy oraz testowy. Zbiór nie jest dzielony w sposób losowy, aby zbiór testowy nie był podobny do zbioru treningowego.
training_set_percentage <- 75
training <- df.to.model[1:round(nrow(df.to.model)*training_set_percentage/100),]
testing <- df.to.model[-(1:round(nrow(df.to.model)*training_set_percentage/100)),]
nrow(testing) + nrow(training) == nrow(df.to.model)
## [1] TRUE
Random forest
ctrl <- trainControl(
# powtórzona ocena krzyżowa
method = "repeatedcv",
# liczba podziałów
number = 2,
# liczba powtórzeń
repeats = 5)
fit <- train(exposition ~ .,
data = training,
method = "rf",
trControl = ctrl,
# Paramter dla algorytmu uczącego
ntree = 10)
fit
## Random Forest
##
## 9368 samples
## 7 predictor
## 3 classes: '-1', '0', '1'
##
## No pre-processing
## Resampling: Cross-Validated (2 fold, repeated 5 times)
## Summary of sample sizes: 4685, 4683, 4684, 4684, 4683, 4685, ...
## Resampling results across tuning parameters:
##
## mtry Accuracy Kappa
## 2 0.9680829 0.9299831
## 4 0.9700682 0.9345880
## 7 0.9691502 0.9326470
##
## Accuracy was used to select the optimal model using the largest value.
## The final value used for the model was mtry = 4.
rfClasses <- predict(fit, newdata = testing)
confusionMatrix(data = rfClasses, testing$exposition)
## Confusion Matrix and Statistics
##
## Reference
## Prediction -1 0 1
## -1 0 2 0
## 0 599 1829 693
## 1 0 0 0
##
## Overall Statistics
##
## Accuracy : 0.5857
## 95% CI : (0.5681, 0.603)
## No Information Rate : 0.5863
## P-Value [Acc > NIR] : 0.5366
##
## Kappa : -9e-04
##
## Mcnemar's Test P-Value : NA
##
## Statistics by Class:
##
## Class: -1 Class: 0 Class: 1
## Sensitivity 0.0000000 0.9989 0.0000
## Specificity 0.9992076 0.0000 1.0000
## Pos Pred Value 0.0000000 0.5860 NaN
## Neg Pred Value 0.8080743 0.0000 0.7781
## Prevalence 0.1918028 0.5863 0.2219
## Detection Rate 0.0000000 0.5857 0.0000
## Detection Prevalence 0.0006404 0.9994 0.0000
## Balanced Accuracy 0.4996038 0.4995 0.5000
Wagi dla klas
model_weights <- as.numeric(training$exposition)
model_weights[training$exposition == -1] <- (1/table(training$exposition)[1]) * 0.5
model_weights[training$exposition == 0] <- (1/table(training$exposition)[2]) * 0.1
model_weights[training$exposition == 1] <- (1/table(training$exposition)[3]) * 0.4
ctrl <- trainControl(
# powtórzona ocena krzyżowa
method = "repeatedcv",
# liczba podziałów
number = 2,
# liczba powtórzeń
repeats = 5,
classProbs = TRUE)
weighted_fit <- train(exposition ~ .,
data = training,
method = "gbm",
verbose = FALSE,
weights = model_weights,
metric = "ROC",
trControl = ctrl)
fit <- train(exposition ~ .,
data = training,
method = "gbm",
trControl = ctrl,
weights = model_weights,
# Paramter dla algorytmu uczącego
ntree = 10)
fit
rfClasses <- predict(fit, newdata = testing)
confusionMatrix(data = rfClasses, testing$exposition)